##This module implements the identification of an LSTM model for a sandy clay loam field
#Imports 
import sys
sys.path.append("core_mz1")
import pandas as pd
import copy
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from lstm_equations import ObtainLstmModel
from model_params import ManagementZone_1
soilPars = ManagementZone_1()
#Load the data
df=pd.read_csv("./core_mz1/training_data_mz1.csv") #Data has been resampled to a time frame of 1 day
dataset=df.iloc[:,].values
#Scale the training data using the min-max scaling approach
sc= MinMaxScaler(feature_range=(0,1))
dataset_scaled = sc.fit_transform(dataset)

#Extract the scaling parameters
data_min = sc.data_min_
data_max = sc.data_max_

#Split the original data into training, validation, and testing data sets
data_train = dataset_scaled[:int(0.95*len(dataset_scaled)),:]
data_validation  = dataset_scaled[int(0.95*len(dataset_scaled)):int(0.97*len(dataset_scaled)),:]
data_test = dataset_scaled[int(0.97*len(dataset_scaled)):,:]

#Some metrics for the LSTM model
sequenceLength = 5 # This value can be changed
numberOfFeatures = 6 # Features are the initial root zone moisture, irrigation rate, crop coefficient, lai_index and reference evapotranspiration
numberOfOutputs = 1 # Represents the successor state

#Prepare the training data
X_train, y_train = [], []
for i in range(sequenceLength, len(data_train)-sequenceLength):
    X_train.append(data_train[i-sequenceLength:i,0:numberOfFeatures])
    y_train.append(data_train[i:i+numberOfOutputs,0])
    pass

X_train, y_train = np.array(X_train), np.array(y_train)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], numberOfFeatures)
y_train = y_train.reshape(y_train.shape[0], y_train.shape[1], numberOfOutputs)

#Prepare the validation data
X_val, y_val = [], []
for i in range(sequenceLength, len(data_validation)-sequenceLength):
    X_val.append(data_validation[i-sequenceLength:i,0:numberOfFeatures])
    y_val.append(data_validation[i:i+numberOfOutputs,0])
    pass
X_val, y_val = np.array(X_val), np.array(y_val) 
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], numberOfFeatures)
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], numberOfOutputs)

#Prepare the test data for validation of the RNN model
X_test, y_test = [], []
for i in range(sequenceLength, len(data_test)-sequenceLength):
    X_test.append(data_test[i-sequenceLength:i,0:numberOfFeatures])
    y_test.append(data_test[i:i+numberOfOutputs,0])
    pass
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], numberOfFeatures)
y_test = y_test.reshape(y_test.shape[0], y_test.shape[1], numberOfOutputs)

### ---------- Training the LSTM model [START] ----------
#Specify the architecture of the network
model=Sequential()
model.add(LSTM(units=400, input_shape=(sequenceLength,numberOfFeatures), return_sequences=True)) # The number of units as well as the number of layers can be changed
model.add(LSTM(units=400, return_sequences=False))
model.add(Dense(numberOfOutputs))
model.compile(loss='mse', optimizer='adam')
#Train the LSTM model
history = model.fit(X_train, y_train, epochs=40, batch_size=20, validation_data=(X_val, y_val)) # Epoch number and batch size can be changed

### ---------- Training the LSTM model [END] ----------
#Plot the training and validation losses to check for under/over-fitting
plt.figure(1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.tight_layout()
plt.savefig('./test_model_results/training_stats_vrd.pdf')
plt.show()
plt.close()


#######---------------------------------Extract the weights and baises of the identified LSTM [ START ]---------------------------------------------------
#Layer 1
#The number of units
units_1 = int(model.layers[0].trainable_weights[0].shape[1]/4)
W_1=model.layers[0].get_weights()[0] # The Kernel, the weighting matrix for the input
U_1=model.layers[0].get_weights()[1] # The recurrent kernel, the weighting matrix for the hidden states
b_1=model.layers[0].get_weights()[2] # The bias matrix
#Layer 2
#The number of units
units_2 = int(model.layers[1].trainable_weights[0].shape[1]/4)
W_2=model.layers[1].get_weights()[0] # The Kernel, the weighting matrix for the input
U_2=model.layers[1].get_weights()[1] # The recurrent kernel, the weighting matrix for the hidden states
b_2=model.layers[1].get_weights()[2] # The bias matrix
#The Dense layer
V_m=model.layers[2].get_weights()[0]
b_m=model.layers[2].get_weights()[1]
######---------------------------------Extract the weights and obtain the mathematical expression of the LSTM [ END ]---------------------------------------------------


### ---------- Test the performance of the LSTM model [START] ----------
#One step ahead prediction setting
predicted_trajectory=model.predict(X_test)
y_test = y_test.reshape(y_test.shape[0],y_test.shape[1])
y_test_unscaled = y_test*(data_max[0]-data_min[0]) + data_min[0]
predicted_trajectory_unscaled = predicted_trajectory*(data_max[0]-data_min[0]) + data_min[0]

#Generate a plot for visualization purposes
plt.figure(2)
plt.plot(predicted_trajectory_unscaled[:],color='red',linestyle='dashdot', label='Predicted Trajectory')
plt.plot(y_test_unscaled[:],color ='blue', linestyle='dashdot', label='Actual trajectory')
plt.title('LSTM model performance - Single step ahead prediction')
plt.legend()
plt.tight_layout()
plt.savefig('./test_model_results/performance_ssap_vol_vrd.pdf')
plt.show()
plt.close()

#Multi step ahead prediction setting, i.e using the identified model recrursively. 
x_test_MSAP = copy.deepcopy(X_test)
x_test_2D = x_test_MSAP.reshape(x_test_MSAP.shape[0], -1)
x_test_2D = x_test_2D[:,0:numberOfFeatures]
initial_input=x_test_2D[0:sequenceLength]
y_test_MSAP = []
y_test_MSAP.append(ObtainLstmModel(W_1, U_1, b_1, W_2, U_2, b_2, V_m, b_m, units_1, units_2, sequenceLength, initial_input))
for i in range(1, len(x_test_2D)-sequenceLength+1):
    print(i)
    current_input = x_test_2D[i:i+sequenceLength]
    current_input[-1,0] = y_test_MSAP[i-1]
    x_test_2D[i:i+sequenceLength] = current_input
    y_test_MSAP.append(ObtainLstmModel(W_1, U_1, b_1, W_2, U_2, b_2, V_m, b_m, units_1, units_2, sequenceLength, current_input))
    pass
y_test_MSAP = np.array(y_test_MSAP)
y_test_MSAP_unscaled = y_test_MSAP*(data_max[0]-data_min[0]) + data_min[0]


plt.figure(4) 
plt.plot(y_test_MSAP_unscaled[:100], color='red', label='Predicted Trajectory')
plt.plot(y_test_unscaled[:100], color ='blue', linestyle='dashed', label='Actual Trajectory')
plt.title('LSTM model performance - Multi-step-ahead prediction')
plt.ylabel("$\theta$")
plt.xlabel("Time (days)")
plt.tight_layout()
plt.legend()
plt.tight_layout()
plt.savefig('./test_model_results/performance_msap_vol_vrd.pdf')
plt.show()
plt.close()

    ## ---------- Test the performance of the LSTM model [END] ----------
#######Save the weights and the scaling information
np.savetxt('./test_model_weights/K_1_mz1_vrd.txt', W_1)
np.savetxt('./test_model_weights/RK_1_mz1_vrd.txt', U_1)
np.savetxt('./test_model_weights/BR_1_mz1_vrd.txt', b_1)
np.savetxt('./test_model_weights/K_2_mz1_vrd.txt', W_2)
np.savetxt('./test_model_weights/RK_2_mz1_vrd.txt', U_2)
np.savetxt('./test_model_weights/BR_2_mz1_vrd.txt', b_2)
np.savetxt('./test_model_weights/KD_mz1_vrd.txt', V_m)
np.savetxt('./test_model_weights/BD_mz1_vrd.txt', b_m)
np.savetxt('./test_model_weights/data_min_mz1_vrd.txt', data_min)
np.savetxt('./test_model_weights/data_max_mz1_vrd.txt', data_max)
